In [ ]:
import pandas as pd
import numpy as np
from __future__ import print_function
from ipywidgets import VBox, HBox
import os
codes = pd.read_csv(os.path.abspath('../data_files/state_codes.csv'))
In [ ]:
try:
from pollster import Pollster
except ImportError:
print('Pollster not found. Installing Pollster..')
import pip
try:
pip.main(['install', 'pollster==0.1.6'])
except:
print("The pip installation failed. Please manually install Pollster and re-run this notebook.")
In [ ]:
def get_candidate_data(question):
clinton, trump, undecided, other = 0., 0., 0., 0.
for candidate in question['subpopulations'][0]['responses']:
if candidate['last_name'] == 'Clinton':
clinton = candidate['value']
elif candidate['last_name'] == 'Trump':
trump = candidate['value']
elif candidate['choice'] == 'Undecided':
undecided = candidate['value']
else:
other = candidate['value']
return clinton, trump, other, undecided
def get_row(question, partisan='Nonpartisan', end_date='2016-06-21'):
# if question['topic'] != '2016-president':
if ('2016' in question['topic']) and ('Presidential' in question['topic']):
hillary, donald, other, undecided = get_candidate_data(question)
return [{'Name': question['name'], 'Partisan': partisan, 'State': question['state'],
'Date': np.datetime64(end_date), 'Trump': donald, 'Clinton': hillary, 'Other': other,
'Undecided': undecided}]
else:
return
def analyze_polls(polls):
global data
for poll in polls:
for question in poll.questions:
resp = get_row(question, partisan=poll.partisan, end_date=poll.end_date)
if resp is not None:
data = data.append(resp)
return
In [ ]:
try:
from pollster import Pollster
pollster = Pollster()
# Getting data from Pollster. This might take a second.
raw_data = pollster.charts(topic='2016-president')
data = pd.DataFrame(columns=['Name', 'Partisan', 'State', 'Date', 'Trump', 'Clinton', 'Other',
'Undecided'])
for i in raw_data:
analyze_polls(i.polls())
except:
raise ValueError('Please install Pollster and run the functions above')
In [ ]:
def get_state_party(code):
state = codes[codes['FIPS']==code]['USPS'].values[0]
if data[data['State']==state].shape[0] == 0:
return None
polls = data[(data['State']==state) & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')
if polls.shape[0] == 0:
return None
if (polls.tail(1)['Trump'] > polls.tail(1)['Clinton']).values[0]:
return 'Republican'
else:
return 'Democrat'
def get_color_data():
color_data = {}
for i in codes['FIPS']:
color_data[i] = get_state_party(i)
return color_data
def get_state_data(code):
state = codes[codes['FIPS']==code]['USPS'].values[0]
if data[data['State']==state].shape[0] == 0:
return None
polls = data[(data['State']==state) & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')
return polls
In [ ]:
from bqplot import *
from ipywidgets import Layout
In [ ]:
dt_x = DateScale()
sc_y = LinearScale()
time_series = Lines(scales={'x': dt_x, 'y': sc_y}, colors=['#E91D0E', '#2aa1ec'], marker='circle')
ax_x = Axis(scale=dt_x, label='Date')
ax_y = Axis(scale=sc_y, orientation='vertical', label='Percentage')
ts_fig = Figure(marks=[time_series], axes=[ax_x, ax_y], title='General Election - State Polls',
layout=Layout(min_width='650px', min_height='400px'))
In [ ]:
sc_geo = AlbersUSA()
sc_c1 = OrdinalColorScale(domain=['Democrat', 'Republican'], colors=['#2aa1ec', '#E91D0E'])
color_data = get_color_data()
map_styles = {'color': color_data,
'scales': {'projection': sc_geo, 'color': sc_c1}, 'colors': {'default_color': 'Grey'}}
axis = ColorAxis(scale=sc_c1)
states_map = Map(map_data=topo_load('map_data/USStatesMap.json'), tooltip=ts_fig, **map_styles)
map_fig = Figure(marks=[states_map], axes=[axis],title='General Election Polls - State Wise')
In [ ]:
def hover_callback(name, value):
polls = get_state_data(value['data']['id'])
if polls is None or polls.shape[0] == 0:
time_series.y = [0.]
return
time_series.x, time_series.y = polls['Date'].values.astype(np.datetime64), [polls['Trump'].values, polls['Clinton'].values]
ts_fig.title = str(codes[codes['FIPS']==value['data']['id']]['Name'].values[0]) + ' Polls - Presidential Election'
In [ ]:
states_map.on_hover(hover_callback)
In [ ]:
national = data[(data['State']=='US') & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')
dt_x = DateScale()
sc_y = LinearScale()
clinton_scatter = Scatter(x=national['Date'].values.astype(np.datetime64), y=national['Clinton'],
scales={'x': dt_x, 'y': sc_y},
colors=['#2aa1ec'])
trump_scatter = Scatter(x=national['Date'].values.astype(np.datetime64), y=national['Trump'],
scales={'x': dt_x, 'y': sc_y},
colors=['#E91D0E'])
ax_x = Axis(scale=dt_x, label='Date', tick_format='%b-%Y', num_ticks=8)
ax_y = Axis(scale=sc_y, orientation='vertical', label='Percentage')
scat_fig = Figure(marks=[clinton_scatter, trump_scatter], axes=[ax_x, ax_y], title='General Election - National Polls')
In [ ]:
VBox([map_fig, scat_fig])
In [ ]:
county_data = pd.read_csv(os.path.abspath('../data_files/2008-election-results.csv'))
In [ ]:
winner = np.array(['McCain'] * county_data.shape[0])
In [ ]:
winner[(county_data['Obama'] > county_data['McCain']).values] = 'Obama'
In [ ]:
sc_geo_county = AlbersUSA()
sc_c1_county = OrdinalColorScale(domain=['McCain', 'Obama'], colors=['Red', 'DeepSkyBlue'])
color_data_county = dict(zip(county_data['FIPS'].values.astype(int), list(winner)))
map_styles_county = {'color': color_data_county,
'scales': {'projection': sc_geo_county, 'color': sc_c1_county}, 'colors': {'default_color': 'Grey'}}
axis_county = ColorAxis(scale=sc_c1_county)
county_map = Map(map_data=topo_load('map_data/USCountiesMap.json'), **map_styles_county)
county_fig = Figure(marks=[county_map], axes=[axis_county],title='US Elections 2008 - Example',
layout=Layout(min_width='800px', min_height='550px'))
In [ ]:
names_sc = OrdinalScale(domain=['Obama', 'McCain'])
vote_sc_y = LinearScale(min=0, max=100.)
names_ax = Axis(scale=names_sc, label='Candidate')
vote_ax = Axis(scale=vote_sc_y, orientation='vertical', label='Percentage')
vote_bars = Bars(scales={'x': names_sc, 'y': vote_sc_y}, colors=['#2aa1ec', '#E91D0E'])
bar_fig = Figure(marks=[vote_bars], axes=[names_ax, vote_ax], title='Vote Margin',
layout=Layout(min_width='600px', min_height='400px'))
In [ ]:
def county_hover(name, value):
if (county_data['FIPS'] == value['data']['id']).sum() == 0:
bar_fig.title = ''
vote_bars.y = [0., 0.]
return
votes = county_data[county_data['FIPS'] == value['data']['id']]
dem_vote = float(votes['Obama %'].values[0])
rep_vote = float(votes['McCain %'].values[0])
vote_bars.x, vote_bars.y = ['Obama', 'McCain'], [dem_vote, rep_vote]
bar_fig.title = 'Vote % - ' + value['data']['name']
county_map.on_hover(county_hover)
county_map.tooltip = bar_fig
In [ ]:
county_fig